
%MACRO missing_stage1 (in,out);

*****************************************************************************
* Program: MISSING_STAGE1					   						
* 																			
* Description: Performs stochastic regression to impute missing discharge
*				dates and missing baseline outcome measurement dates for CDDC 
*				participants. Locates auxillary variables to use in the 
*				imputation modeling. Generates sensitivity data for each 
*				imputation case. Recombines data in master dataset, prints
*				summaries of imputation result. Recomputes times vars based on 
*				imputed discharge date (for VTC group) and computes/prints
*				freqs of number of observed outcome values.
*						
* Last modified: July 27, 2015												
* Created by: Martin Wegman													
*****************************************************************************;

%let date = %sysfunc(date(),DATE7.);
ODS ESCAPECHAR="^";
options nodate nonumber;
ods noproctitle;

%let dat = data;

%let v = 15;

data loc.&dat.&v;
	set loc.&in;
run;

****************************************************************************;
****************************************************************************;
****************           Examine distributions              **************;
****************************************************************************;
****************************************************************************;



/*
%let opi_list =  	Y0_opi Y1_opi Y2_opi Y3_opi 
					Y4_opi Y6_opi Y9_opi Y12_opi Y15_opi;

%let T_list = T0 T1 T2 T3 T4 T6 T9 T12 T15;

* Remember, different origins:
	For CDDC, origin is DOR. For VTC, origin is MMT Start date.;

proc means data = loc.&dat.&v N NMISS MEAN STD MIN Q1 MEDIAN Q3 MAX fw=5;
	class site;
	var T0 T1 T2 T3 T4 T6 T9 T12 T15 T_dis T_MMT T_enroll;
run;

* Re-examine missing values to get a sense of which vars should be included in imputation;
* Disclude vars with substantial missingness from being auxiallary vars;

proc format;
	value miss_ 	. 		= missing
					other	= not missing;
run;

proc freq data = loc.&dat.&v;
	where  site = 2;
	tables T_dis so_rec_d so_ts_d ss_fri cs01_craving
			dm3_m rbd1 daily_heroin lifetime_moreuse dast_cat
			dm6a_m dm7 dm8 ih04_agefirsttime ih06_sqrt 
			RA frequent_heroin frequent_moreuse 
			recent_moreuse recent_stimulants du15_choice_m/missing;
	format T_dis so_rec_d so_ts_d ss_fri cs01_craving
			dm3_m rbd1 daily_heroin lifetime_moreuse dast_cat
			dm6a_m dm7 dm8 ih04_agefirsttime ih06_sqrt 
			RA frequent_heroin frequent_moreuse 
			recent_moreuse recent_stimulants du15_choice_m miss_.;
run;

*/
****************************************************************************;
****************************************************************************;
****   Vars for consideration in imputation and analytic models    *********;
****************************************************************************;
****************************************************************************;

%let model_num = 	dm0
					ih01_sqrt
					ih02_log
					ih05_totaltimes 
					du16_log 
					du02b_heroin_years 
					cs01_craving
					so_rec_d so_amb_d so_ts_d
					ss_so ss_fam ss_fri;

%let maybe_num =  	dm6a_m 
					dm6b_m 
					dm6_m 
					dm7 
					dm8
					ih03_totalmonths 
					ih04_agefirsttime
					ih06_sqrt;

%let model_discrim = dast_cat_m dm2_m;

%let model_logistic = 
					dm3_m
					dm4_m
					dm5_m 
					dt01 
					rbd1
					daily_heroin  
					lifetime_alcohol 
					lifetime_other_opiates 
					lifetime_benzos 
					lifetime_stimulants 
					lifetime_moreuse;

%let maybe_char =	rbs1a  id5_cnc 
					du15_choice_m
					dt01b
					he01 
					he02 
					lifetime_polyuse
					lifetime_thc 
					recent_polyuse 
					recent_heroin 
					recent_other_opiates 
					recent_benzos 
					recent_stimulants
					recent_moreuse  
					frequent_moreuse
					frequent_heroin;

%let maybe_discrim = he03a RA;

****************************************************************************;
****************************************************************************;
***********      Find auxillary variables for T_dis           **************;
****************************************************************************;
****************************************************************************;

* Create missing indicators for vars of interest;

%let v = 15;
%let v2 = %eval(&v + 1);

data loc.&dat.&v2;
	set loc.&dat.&v;

	if T_dis = . then miss_dis = 1;
	else miss_dis = 0;

	if T0 = . then miss_T0 = 1;
	else miss_T0 = 0;
run;


%let v = 16;

/*
* Rates of missingess for miss_dis;
proc freq data = loc.&dat.&v;
	where site = 2;
	tables miss_dis;
run;

* Examine associations with missingness on T_dis;
proc ttest data = loc.&dat.&v;
	class miss_dis;
	where  site = 2;
	var T_mmt T0 T1 T2 T3 T4 T6 T9 T12 &model_num &maybe_num;
run;
quit;


proc corr data = loc.&dat.&v;
	where site = 2;
	var T_dis T_mmt T0-T4 T6 T9 T12 &model_num &maybe_num;
run;
quit;
*/

/*Significant assn with missing indicator;
cs01_craving (higher/greater missing)
so_rec_d (lower/greater missing)
so_ts_d (lower/greater missing)
ss_fri (higher/greater missing)

T_mmt, T0, T6

dm6a_m (higher/greater missing)
dm7 (higher//greater missing)
dm8 (lower/greater missing)
ih04_agefirsttime (younger/more missing) (marginal)
ih06_sqrt (fewer month in Puspen/more missing)

Significant correlation (>.3) with missing var:
cs01_craving
dm6a_m
T4 T6 T9 
*/

/*
proc freq data = loc.&dat.&v;
	where site = 2;
	tables miss_dis*(&model_logistic &model_discrim)/chisq;
run;

proc freq data = loc.&dat.&v;
	where site = 2;
	tables miss_dis*(&maybe_char Y:)/chisq;
run;

%MACRO TTESTING (ind,var,site);
	%let i = 1; 
	%do %while(("%scan(&ind,&i)" NE ""));
		%let temp = %scan(&ind,&i);
		%let i = %eval(&i + 1);
		proc ttest data = loc.&dat.&v;
			class &temp;
			where site = &site;
			var &var;
		run;
	%end;
		quit;
%MEND;

%let site = 2;
%let var = T_dis;
%let opi_list =  	Y0_opi Y1_opi Y2_opi Y3_opi 
					Y4_opi Y6_opi Y9_opi Y12_opi Y15_opi;
%let amp_list =  	Y0_amp Y1_amp Y2_amp Y3_amp 
					Y4_amp Y6_amp Y9_amp Y12_amp Y15_amp;
%let mtd_list =  	Y0_mtd Y1_mtd Y2_mtd Y3_mtd 
					Y4_mtd Y6_mtd Y9_mtd Y12_mtd Y15_mtd;
%let bzd_list =  	Y0_bzd Y1_bzd Y2_bzd Y3_bzd 
					Y4_bzd Y6_bzd Y9_bzd Y12_bzd Y15_bzd;

%TTESTING(&model_logistic, &var,&site);
%TTESTING(&maybe_char, &var,&site);
%TTESTING(&opi_list, &var,&site);
%TTESTING(&amp_list, &var,&site);
%TTESTING(&mtd_list, &var,&site);
%TTESTING(&bzd_list, &var,&site);

%MACRO ANOVA (ind,var,site);
	%let i = 1; 
	%do %while(("%scan(&ind,&i)" NE ""));
		%let temp = %scan(&ind,&i);
		%let i = %eval(&i + 1);
		proc anova data = loc.&dat.&v;
			class &temp;
			where site = &site;
			model &var = &temp;
			means &temp;
		run;
	%end;
		quit;
%MEND;

%let site = 2;
%let var = T_dis;
%ANOVA(&model_discrim,&var,&site);
%ANOVA(&maybe_discrim,&var,&site);
*/

/*
Significant assn with missing indicator:
rbd1 (no injection/greater missing)
daily_heroin (no daily heroin/greater missing)
lifetime_moreuse (no poly use/greater missing)
dast_cat
lifetime_other_opiates (marginal)
lifetime_stimulants (marginal)

Y15_mtd
Y9_mtd
Y0_bzd

RA
frequent_heroin
frequent_moreuse
recent_moreuse
recent_stimulants
du15_choice_m
he01 (marginal)

Significant assn with T-Dis:
dm3_m
Y1_opi
Y9_mtd
Y9_amp

*/

****************************************************************************;
****************************************************************************;
****************      Add missing flag for other vars          **************;
****************************************************************************;
****************************************************************************;

* Only want to impute T_dis here, other vars will be imputed in the process, 
	want to flag these values and remove, so that they can be addressed 
	with more optimal model in stage 2.;

%MACRO add_miss_ind(dsin,dsout,num_list,char_list);

	%let i = 1; 
	
	%do %while(("%scan(&num_list,&i)" NE ""));
		%let miss_num = &miss_num m_%scan(&num_list,&i);
		%let i = %eval(&i + 1);
	%end;

	%let i = 1;
	
	%do %while(("%scan(&char_list,&i)" NE ""));
		%let miss_char = &miss_char m_%scan(&char_list,&i);
		%let i = %eval(&i + 1);
	%end;

	%let v2 = %eval(&v + 1);

	data &dsout;
		set &dsin;

		array x &num_list;

		array y &char_list;

		array a &miss_num;

		array b &miss_char;

		do over x;
			if x = . then a = 1;
			else a = 0;
		end;

		do over y;
			if y = . then b = 1;
			else b = 0;
		end;

	run;

%MEND;

%let num_list = 		dm0
						ih01_sqrt
						ih02_log
						du16_log 
						du02b_heroin_years 
						cs01_craving
						so_rec_d so_amb_d so_ts_d
						ss_so ss_fam ss_fri
						T_mmt T0 Y0_bzd Y1_opi;	

%let char_list = 		dm3_m
						dm4_m
						dm5_m 
						rbd1
						daily_heroin
						lifetime_stimulants 
						lifetime_moreuse
						dast_cat_m dm2_m;

						 								
%let miss_num =;
%let miss_char =;
%let v = 16;
%let dsin = loc.&dat.&v;
%let dsout = loc.&dat.&v._c;

%add_miss_ind(&dsin,&dsout,&num_list,&char_list);

****************************************************************************;
****************************************************************************;
****************       IMPUTE Missing T_dis for VTC           **************;
****************************************************************************;
****************************************************************************;

* Significant auxillary vars plus expected model predictors
	did not fit - attempt to reduce  number of vars;

/*
* Look again rates of missingess;
proc means data = loc.&dat.&v._c NMISS;
where site = 2;
	var 	&model_num &model_logistic &model_discrim 
			dm6a_m dm7 dm8 ih04_agefirsttime ih06_sqrt 
			RA recent_moreuse recent_stimulants du15_choice_m he01 T_mmt 
			T0 Y0_bzd Y1_opi T4 T6 T9 Y9_mtd Y9_amp Y15_mtd T_dis;
run;

* keep Y1_opi (even though miss = 11), because such a strong predictor;
* remove dm8 ih06_sqrt (auxiallary vars) and dt01 ih05 (model vars) due to excess missing;
* remove lifetime moreuse/stimulants (model vars) and freq heroin use/more use (aux vars) due to overlap;
* remove T4 T6 T9 Y9_mtd Y9_amp Y15_mtd b/c of substantial missing;

%let model_num = 	dm0
					ih01_sqrt
					ih02_log
					du16_log 
					du02b_heroin_years 
					cs01_craving
					so_rec_d so_amb_d so_ts_d
					ss_so ss_fam ss_fri;

%let model_logistic = 
					dm3_m
					dm4_m
					dm5_m 
					rbd1
					daily_heroin  
					lifetime_alcohol 
					lifetime_other_opiates 
					lifetime_benzos;

%let model_discrim = dast_cat_m dm2_m;

proc mi nimpute=1 data = loc.&dat.&v._c out = loc.&dat.&v._cI1a seed=200;
	where site = 2;
	class 	&model_discrim &model_logistic 
			RA
			recent_moreuse
			recent_stimulants
			Y0_bzd Y1_opi ;
	fcs nbiter = 100 
	regpmm(T_dis) 
	discrim(&model_discrim 
			&model_logistic 
			recent_moreuse
			recent_stimulants /CLASSEFFECTS=INCLUDE)
	plots=trace(mean(T_dis) std(T_dis));
	var   	T_dis &model_num &model_logistic &model_discrim 
			dm6a_m dm7 
			RA
			recent_moreuse
			recent_stimulants 
			T_mmt T0 Y0_bzd Y1_opi; 
run; 

* Model did not fit/same result when Y1_opi is removed;

* Attempt with just those vars which are expected model predictors;

%let model_num = 		dm0
						ih01_sqrt
						ih02_log
						ih05_totaltimes 
						du16_log 
						du02b_heroin_years 
						cs01_craving
						so_rec_d so_amb_d so_ts_d
						ss_so ss_fam ss_fri;

%let model_discrim = 	dast_cat_m dm2_m;

%let model_logistic = 	dm3_m
						dm4_m
						dm5_m 
						dt01 
						rbd1
						daily_heroin  
						lifetime_alcohol 
						lifetime_other_opiates 
						lifetime_benzos 
						lifetime_stimulants 
						lifetime_moreuse;

proc mi nimpute=1 data = loc.&dat.&v._c out = loc.&dat.&v._cI1a seed=200;
	where site = 2;
	class &model_discrim &model_logistic ;
	fcs nbiter = 1
	regpmm(T_dis) 
	discrim(&model_discrim &model_logistic/CLASSEFFECTS=INCLUDE) 
	plots=trace(mean(T_dis) std(T_dis));
	var  T_dis &model_num &model_logistic &model_discrim; 
run; 

* Model did not fit;

* Attempt with just those vars which are expected model predictors;
* Disclude vars with substanial missingness (dt01 ih05);

%let model_num = 		dm0
						ih01_sqrt
						ih02_log
						du16_log 
						du02b_heroin_years 
						cs01_craving
						so_rec_d so_amb_d so_ts_d
						ss_so ss_fam ss_fri;

%let model_discrim = 	dast_cat_m dm2_m;

%let model_logistic = 	dm3_m
						dm4_m
						dm5_m 
						rbd1
						daily_heroin  
						lifetime_alcohol 
						lifetime_other_opiates 
						lifetime_benzos 
						lifetime_stimulants 
						lifetime_moreuse;

proc mi nimpute=20 data = loc.&dat.&v._c out = loc.&dat.&v._cI1a seed=200;
	where site = 2;
	class &model_discrim &model_logistic ;
	fcs nbiter = 100
	regpmm(T_dis) 
	discrim(&model_discrim &model_logistic/CLASSEFFECTS=INCLUDE) 
	plots=trace(mean(T_dis) std(T_dis));
	var  T_dis &model_num &model_logistic &model_discrim; 
run; 

* Model fits;
*/
* Attempt with expected model predictors and a few auxillary vars;
* Disclude vars with substanial missingness (dt01 ih05);
* Disclude some lifetime drug use vars to free up dfs for auxillary vars;

%let model_num = 		dm0
						ih01_sqrt
						ih02_log
						du16_log 
						du02b_heroin_years 
						cs01_craving
						so_rec_d so_amb_d so_ts_d
						ss_so ss_fam ss_fri;

%let model_discrim = 	dast_cat_m dm2_m;

%let model_logistic = 	dm3_m
						dm4_m
						dm5_m 
						rbd1
						daily_heroin
						lifetime_stimulants 
						lifetime_moreuse;

******************** Final Imputation Model for T_dis  **************************;

proc mi nimpute=1 data = loc.&dat.&v._c out = loc.&dat.&v._cI1 seed=200;
	where site = 2;
	class &model_discrim &model_logistic Y0_bzd Y1_opi;
	fcs nbiter = 100
	regpmm(T_dis) 
	discrim(&model_discrim &model_logistic Y0_bzd Y1_opi/CLASSEFFECTS=INCLUDE);
	var  T_dis &model_num &model_logistic &model_discrim T_mmt T0 Y0_bzd Y1_opi; 
run; 


****************************************************************************;
****************************************************************************;
**********    Remove imputed values which were flagged          ************;
****************************************************************************;
****************************************************************************;

* Remove imputed values for all but T_dis;

%MACRO make_missing(dsin,dsout,num_list,char_list);

	%let v2 = %eval(&v + 1);

	data &dsout;
		set &dsin;

		array x &num_list;

		array y &char_list;

		array a &miss_num;

		array b &miss_char;

		do over a;
			if a = 1 then x =.;
		end;

		do over b;
			if b = 1 then y =.;
		end;

	run;

%MEND;

%let v = 16;
%let dsin = loc.&dat.&v._cI1;
%let dsout = loc.&dat.&v._cI1m;

%make_missing(&dsin,&dsout,&num_list,&char_list);

* Confirm that imputed values were set to misssing again;
/*
proc means data = loc.&dat.&v._cI1m N NMISS MEAN STD MIN Q1 MEDIAN Q3 MAX fw=5;
var &num_list &char_list;
run;
*/

****************************************************************************;
****************************************************************************;
*************      Sensitivity data for T_dis imputation      **************;
****************************************************************************;
****************************************************************************;
	
* Sensitivity case where T_dis is 90 days after start of MMT;

data loc.&dat.&v._cI2m;
	set loc.&dat.&v._c;
	where site = 2;

	if T_dis = . then T_dis = 90;

run;

* Sensitivity case where T_dis is 15 days after start of MMT;

data loc.&dat.&v._cI3m;
	set loc.&dat.&v._c;
	where site = 2;

	if T_dis = . then T_dis = 30;

run;

****************************************************************************;
****************************************************************************;
*************       Print summary for  T_dis imputation       **************;
****************************************************************************;
****************************************************************************;

proc means data = loc.&dat.&v._cI1m MEAN STD MIN Q1 MEDIAN Q3 MAX fw=4 noprint;
	class miss_dis;
	var T_dis;
	output out=temp1 N=N MEAN=Mean STD=STD MIN=Min  Q1=Q1 Median=Median Q3=Q3  MAX=Max;
run;

proc means data = loc.&dat.&v._cI2m MEAN STD MIN Q1 MEDIAN Q3 MAX fw=4 noprint;
	class miss_dis;
	var T_dis;
	output out=temp2 N=N MEAN=Mean STD=STD MIN=Min  Q1=Q1 Median=Median Q3=Q3  MAX=Max;
run;

proc means data = loc.&dat.&v._cI3m MEAN STD MIN Q1 MEDIAN Q3 MAX fw=4 noprint;
	class miss_dis;
	var T_dis;
	output out=temp3 N=N MEAN=Mean STD=STD MIN=Min  Q1=Q1 Median=Median Q3=Q3  MAX=Max;
run;

data tempall;
	length type $30.;
	set temp1 (in = in1)
		temp2 (in = in2)
		temp3 (in = in3);

	if in1 and miss_dis = 0 then type = 'None (Complete cases)';
	else if in1 and miss_dis = 1 then type = 'Stochastic regression';
	else if in2 and miss_dis = 1 then type = 'Single value (90 days)';
	else if in3 and miss_dis = 1 then type = 'Single value (30 days)';
	else delete;

	label type = 'Imputation type';
	label std = "Std Dev";
	label q1 ="Lower Quartile";
	label q3 ="Upper Quartile";

	format mean std 4.1;

	drop miss_dis _TYPE_ _FREQ_;

run;


%let file = "&out_dir.\CDDC Urine Supp imputation inpatient days &date..rtf";
ods rtf file = &file Style = Custom startpage = NO BODYTITLE;

Title 'Table S3. Summary of imputation results for length of inpatient stay, in days (VTC group).';
proc print data = tempall label noobs;
var type /style(header data)={just=l};
var n mean std min q1 median q3 max /style(header data)={just=c};
run;

title;
ods text = "^S={LEFTMARGIN = .5in RIGHTMARGIN = 1in}VTC: Voluntary Treatment Center";

ods rtf close;


****************************************************************************;
****************************************************************************;
***********      Find auxillary variables for T0 CDDC         **************;
****************************************************************************;
****************************************************************************;

* Examine associations with missingness on T0 and with T0;

%let v = 16;

/*

proc ttest data = loc.&dat.&v;
	class miss_T0;
	where site = 1;
	var &model_num &maybe_num T:;
run;
quit;

proc corr data = loc.&dat.&v;
	where first and unique_match_base_out and site = 1;
	var T: &model_num &maybe_num ;
run;
quit;

*/

/*Significant assn with missing indicator;
dm6_m 
CS01_craving
ih02_log
ih01_sqrt 

Significant correlation (>.3) with missing var:
IH04_agefirsttime
T3

*/

/*

proc freq data = loc.&dat.&v;
	where site = 1;
	tables miss_T0*(&model_logistic &model_discrim Y:)/chisq;
run;

proc freq data = loc.&dat.&v;
	where site = 1;
	tables miss_T0*(&maybe_char)/chisq;
run;

%let site = 1;
%let var = T0;

%TTESTING(&model_logistic,&var,&site);
%TTESTING(&maybe_char,&var,&site);

%ANOVA(&model_discrim,&var,&site);
%ANOVA(&maybe_discrim,&var,&site);

*/

/*Significant assn with missing indicator;
lifetime_thc
lifetime_polyuse 

Significant relation with missing var:
RA
frequent_heroin
recent_other_opiates
he01 
rbs1a
lifetime_stimulants
daily_heroin

*/

****************************************************************************;
****************************************************************************;
****************         IMPUTE Missing T0 for CDDC           **************;
****************************************************************************;
****************************************************************************;

%let model_num =  		dm0
						ih01_sqrt
						ih02_log
						ih05_totaltimes 
						du16_log 
						du02b_heroin_years 
						cs01_craving
						so_rec_d so_amb_d so_ts_d
						ss_so ss_fam ss_fri;

%let model_discrim = 	dast_cat_m dm2_m;

%let model_logistic = 	dm3_m
						dm4_m
						dm5_m 
						dt01 
						rbd1
						daily_heroin  
						lifetime_alcohol 
						lifetime_other_opiates 
						lifetime_benzos 
						lifetime_stimulants 
						lifetime_moreuse;
/*

* Try the expected model parameters plus auxillary vars;
* Disclude aux vars which are very similar to those already in the model;

proc mi nimpute=1 data = loc.&dat.&v._p out = loc.&dat.&v._pI1 seed=200;
	where site = 1;
	class 	&model_logistic 
			&model_discrim
			RA he01 rbs1a; 
	fcs nbiter = 100 
	regpmm(T0) 
	discrim(&model_logistic &model_discrim RA he01 rbs1a/CLASSEFFECTS=INCLUDE);
	var  T0 &model_num &model_logistic &model_discrim 
			dm6_m  IH04_agefirsttime RA he01 rbs1a;  
run; 

*/

****************************************************************************;

* Add in missing value indicators to facilitate later removal;

%let char_list =	&model_logistic 
					&model_discrim
					RA rbs1a
					Y0_opi Y0_mtd Y0_bzd;

%let num_list =	&model_num dm6_m IH04_agefirsttime;

%let miss_num =;
%let miss_char =;
%let v = 16;
%let dsin = loc.&dat.&v;
%let dsout = loc.&dat.&v._p;

%add_miss_ind(&dsin,&dsout,&num_list,&char_list);

********************* Final imputation model for T0 ************************;

proc mi nimpute=1 data = loc.&dat.&v._p out = loc.&dat.&v._pI1 seed=200;
	where site = 1;
	class 	&model_logistic 
			&model_discrim
			RA rbs1a
			Y0_opi Y0_mtd Y0_bzd; 
	fcs nbiter = 100 
	regpmm(T0) 
	discrim(&model_logistic &model_discrim RA rbs1a/CLASSEFFECTS=INCLUDE);
	var  T0 &model_num &model_logistic &model_discrim Y0_opi Y0_mtd Y0_bzd
			dm6_m IH04_agefirsttime RA rbs1a;  
run; 

****************************************************************************;

* Remove imputed values indicated by flag;

%let dsin = loc.&dat.&v._pI1;
%let dsout = loc.&dat.&v._pI1m;

%make_missing(&dsin,&dsout,&num_list,&char_list);

****************************************************************************;
****************************************************************************;
*************        Sensitivity data for T0 CDDC             **************;
****************************************************************************;
****************************************************************************;

* Sensitivity case where T0 is 0 days after release;

%let v = 16;

data loc.&dat.&v._pI2m;
	set loc.&dat.&v._p;
	where site = 1;

	if T0 = . then T0 = 14;

run;

* Sensitivity case where T0 is 14 days after release;

data loc.&dat.&v._pI3m;
	set loc.&dat.&v._p;
	where site = 1;

	if T0 = . then T0 = 0;

run;

****************************************************************************;
****************************************************************************;
*************       Print summary for  T0 imputation          **************;
****************************************************************************;
****************************************************************************;

proc means data = loc.&dat.&v._pI1m MEAN STD MIN Q1 MEDIAN Q3 MAX fw=4 noprint;
	class miss_T0;
	var T0;
	output out=temp1 N=N MEAN=Mean STD=STD MIN=Min  Q1=Q1 Median=Median Q3=Q3  MAX=Max;
run;

proc means data = loc.&dat.&v._pI2m MEAN STD MIN Q1 MEDIAN Q3 MAX fw=4 noprint;
	class miss_T0;
	var T0;
	output out=temp2 N=N MEAN=Mean STD=STD MIN=Min  Q1=Q1 Median=Median Q3=Q3  MAX=Max;
run;

proc means data = loc.&dat.&v._pI3m MEAN STD MIN Q1 MEDIAN Q3 MAX fw=4 noprint;
	class miss_T0;
	var T0;
	output out=temp3 N=N MEAN=Mean STD=STD MIN=Min  Q1=Q1 Median=Median Q3=Q3  MAX=Max;
run;

data tempall;
	length type $30.;
	set temp1 (in = in1)
		temp2 (in = in2)
		temp3 (in = in3);

	if in1 and miss_T0 = 0 then type = 'None (Complete cases)';
	else if in1 and miss_T0 = 1 then type = 'Stochastic regression';
	else if in2 and miss_T0 = 1 then type = 'Single value (14 days)';
	else if in3 and miss_T0 = 1 then type = 'Single value (0 days)';
	else delete;

	label type = 'Imputation type';
	label std = "Std Dev";
	label q1 ="Lower Quartile";
	label q3 ="Upper Quartile";

	format mean std 4.1;

	drop miss_T0 _TYPE_ _FREQ_;

run;

%let file = "&out_dir.\CDDC Urine Supp imputation first outcome time &date..rtf";
ods rtf file = &file Style = custom startpage = NO BODYTITLE;

Title 'Table S4. Summary of imputation results for first outcome measurement time, in days (CDDC group).';
proc print data = tempall label noobs; 
var type /style(header data)={just=l};
var n mean std min q1 median q3 max /style(header data)={just=c};
run;
title;

ods text = "^S={LEFTMARGIN = .20in RIGHTMARGIN = 1in}CDDC: Compulsory Drug Detention Center";

ods rtf close;


****************************************************************************;
****************************************************************************;
************    Merge/Compute time vars using imputed values      **********;
****************************************************************************;
****************************************************************************;

%let v = 16;
%let v2 = %eval(&v + 1);

data loc.&dat.&v2;
	set
	loc.&dat.&v._cI1m (in = inc1)
	loc.&dat.&v._cI2m (in = inc2)
	loc.&dat.&v._cI3m (in = inc3)
	loc.&dat.&v._pI1m (in = inp1)
	loc.&dat.&v._pI2m (in = inp2)
	loc.&dat.&v._pI3m (in = inp3);

	if inc1 then IMP = 'c1';
	if inc2 then IMP = 'c2';
	if inc3 then IMP = 'c3';
	if inp1 then IMP = 'p1';
	if inp2 then IMP = 'p2';
	if inp3 then IMP = 'p3';

	if site = 2 then do;
		* Recompute time vars using Date of discharge as time origin;
		T0 = T0 - T_dis;
		T1 = T1 - T_dis;
		T2 = T2 - T_dis;
		T3 = T3 - T_dis;
		T4 = T4 - T_dis;
		T6 = T6 - T_dis;
		T9 = T9 - T_dis;
		T12 = T12 - T_dis;
		T15 = T15 - T_dis;

		T_int = T_int - T_dis;

	end;

	label site = 'Arm';

	
run;

****************************************************************************;
****************************************************************************;
************    Print summary distributions for time vars     **************;
****************************************************************************;
****************************************************************************;

proc format;
 value arm_ 1="CDDC"
 			2="VTC";
run;

%let file = "&out_dir.\CDDC Urine Supp observation times &date..rtf";
ods rtf file = &file Style = Custom_narrow startpage = NO BODYTITLE;

Title 'Table S5. Summary of outcome measurement timing, in days.';
proc means data = loc.&dat.&v2 N MEAN STD MIN Q1 MEDIAN Q3 MAX fw=3;
	class site;
	where imp in('c1','p1');
	var T0 T1 T2 T3 T4 T6 T9 T12 T15;
	format site arm_.;
run;
title;

%let text = 
"^S={LEFTMARGIN = 1in RIGHTMARGIN = 1in}
Note: Table summarizes time (in days) 
 from date of release (CDDC group) 
 or date of discharge (VTC group) for outcome measurements using stochastic 
 regression imputations for missing length of inpatient stay (VTC group) 
 or for missing date of first outcome measurement (CDDC group). Negative 
 measurement times indicate measurements occured prior 
 to discharge (i.e during the inpatient stay).^2n";
ods RTF text = &text;
ods text = "^S={LEFTMARGIN = 1in RIGHTMARGIN = 1in}VTC: Voluntary Treatment Center";
ods text = "^S={LEFTMARGIN = 1in RIGHTMARGIN = 1in}CDDC: Compulsory Drug Detention Center";


ods rtf close;

****************************************************************************;
****************************************************************************;
************          Remove negative events/times            **************;
****************************************************************************;
****************************************************************************;

*Compute number of urines recorded for each record post discharge/release 
		and remove observations before release/discharge;

%let v = 17;
%let v2 = %eval(&v + 1);
%let v3 = %eval(&v2 + 1);
%let num = 0 1 2 3 4 6 9 12 15;

%macro rename(num,var1,var2,prefix);

	%let k=1;
	%let val = %scan(&num, &k);
	%global &var1.&var2._list &prefix.&var1.&var2._list;
	%let &prefix.&var1.&var2._list =;
	%let &var1.&var2._list =;

	%do %while("&val" NE "");
		
		%let old = &var1&val&var2;
		%let new = &prefix&old;
		rename &old = &new;
		%let &var1.&var2._list = &&&var1.&var2._list &old;
		%let &prefix.&var1.&var2._list = &&&prefix.&var1.&var2._list &new;
		%let k = %eval(&k + 1);
		%let val = %scan(&num, &k);

	%end;

%mend;

* Create a new set of variables with given prefix to use in the shifts below;

data loc.&dat.&v2;
	set loc.&dat.&v;

	%let prefix = n;
	%let var1 = T;
	%let var2 =;
	%rename(&num,&var1,&var2,&prefix);

	%let var1 = F;
	%let var2 =;
	%rename(&num,&var1,&var2,&prefix);

	%let var1 = Y;
	%let var2 =_opi;
	%rename(&num,&var1,&var2,&prefix);

	%let var1 = Y;
	%let var2 =_amp;
	%rename(&num,&var1,&var2,&prefix);

	%let var1 = Y;
	%let var2 =_bzd;
	%rename(&num,&var1,&var2,&prefix);

	%let var1 = Y;
	%let var2 =_thc;
	%rename(&num,&var1,&var2,&prefix);

	%let var1 = Y;
	%let var2 =_mtd;
	%rename(&num,&var1,&var2,&prefix);

	%let var1 = Y;
	%let var2 =_bup;
	%rename(&num,&var1,&var2,&prefix);

	%let var1 = Y;
	%let var2 =_hiv;
	%rename(&num,&var1,&var2,&prefix);

run;

data loc.&dat.&v3;
	set loc.&dat.&v2;

	array T [*] &T_list;
	array nT [*] &nT_list;

	array cT [*] cT0 cT1 cT2 cT3 cT4 cT6 cT9 cT12 cT15;

	array F [*] &F_list;
	array nF [*] &nF_list;

	array Y_opi [*] &Y_opi_list;
	array nY_opi [*] &nY_opi_list;

	array Y_amp [*] &Y_amp_list;
	array nY_amp [*] &nY_amp_list;

	array Y_bzd [*] &Y_bzd_list;
	array nY_bzd [*] &nY_bzd_list;

	array Y_thc [*] &Y_thc_list;
	array nY_thc [*] &nY_thc_list;

	array Y_mtd [*] &Y_mtd_list;
	array nY_mtd [*] &nY_mtd_list;

	array Y_bup [*] &Y_bup_list;
	array nY_bup [*] &nY_bup_list;

	array Y_hiv [*] &Y_hiv_list;
	array nY_hiv [*] &nY_hiv_list;

	if site = 1 then do;
		* No change needed, since these all occured 
			after the origin of time;

		do i = 1 to 9;

			T[i] = nT[i];
			F[i] = nF[i];
			Y_opi[i] = nY_opi[i];
			Y_amp[i] = nY_amp[i];
			Y_bzd[i] = nY_bzd[i];
			Y_thc[i] = nY_thc[i];
			Y_mtd[i] = nY_mtd[i];
			Y_bup[i] = nY_bup[i];
			Y_hiv[i] = nY_hiv[i];

			if F[i] = 0 then cT[i] = nT[i];

		end;

	end;

	if site = 2 then do;

		count = 1;
		flag = 0;

		do until (flag OR count > 9);

			if nT[count] >=0 then do;
				* Once a time is found that is greater than/eq zero, 
						set this as first time and shift other
						times/events accordingly;
				flag = 1;
				j = 0;

				do i = count to 9;

					j = j + 1;

					T[j] = nT[i];

					Y_opi[j] = nY_opi[i];
					Y_amp[j] = nY_amp[i];
					Y_bzd[j] = nY_bzd[i];
					Y_thc[j] = nY_thc[i];
					Y_mtd[j] = nY_mtd[i];
					Y_bup[j] = nY_bup[i];
					Y_hiv[j] = nY_hiv[i];

					F[j] = nF[i];

					if F[j] = 0 then cT[j] = nT[i];

				end;

			end;

			count = count + 1;

		end;

	end;

	* Compute the number of outcomes observed for each participant;
	urines_observed = 0;

	do i = 1 to 9;

		urines_observed = (F[i]=0) + urines_observed;

	end;

	drop count i j ;
* nT: nY: nF:;

run;

****************************************************************************;
****************************************************************************;
********        Print summaries of number of measurements        ***********;
****************************************************************************;
****************************************************************************;

proc freq data = loc.&dat.&v3 noprint;
	where imp = 'c1';
	tables urines_observed / out = temp1;
run;

proc freq data = loc.&dat.&v3 noprint;
	where imp = 'c2';
	tables urines_observed/ out = temp2;
run;

proc freq data = loc.&dat.&v3 noprint;
	where imp = 'c3';
	tables urines_observed/ out = temp3;
run;

proc freq data = loc.&dat.&v3 noprint;
	where imp = 'p1';
	tables urines_observed/ out = temp4;
run;

proc freq data = loc.&dat.&v3 noprint;
	where imp = 'p2';
	tables urines_observed/ out = temp5;
run;

proc freq data = loc.&dat.&v3 noprint;
	where imp = 'p3';
	tables urines_observed/ out = temp6;
run;

data tempall;
	merge temp1(rename=(count=v1))
		temp2(rename=(count=v2))
		temp3(rename=(count=v3))
		temp4(rename=(count=v4))
		temp5(rename=(count=v5))
		temp6(rename=(count=v6));
	by urines_observed;

	label v1 = "VTC (SR)";
	label v2 = "VTC (SV90)";
	label v3 = "VTC (SV30)";
	label v4 = "CDDC (SR)";
	label v5 = "CDDC (SV14)";
	label v6 = "CDDC (SV0)";
	label urines_observed = "Number of observations";
	drop percent;
run;


%let file = "&out_dir.\CDDC Urine Supp freq of number of observations &date..rtf";
ods rtf file = &file Style = custom startpage = NO BODYTITLE;

Title 'Table S7. Frequencies of participants with various numbers of outcome measurements post-release/discharge.';
proc print data = tempall noobs label; 
var urines_observed /style(header data)={just=l};
var v1-v6/style(header data)={just=c};
run;
Title;
ods text = "VTC: Voluntary Treatment Center";
ods text = "CDDC: Compulsory Drug Detention Center";
ods text = "SR: Stochastic regression imputation";
ods text = "SV90: Single value imputation of length of inpatient stay (90 days)";
ods text = "SV30: Single value imputation of length of inpatient stay (30 days)";
ods text = "SV14: Single value imputation of time of first outcome measurement (14 days)";
ods text =  "SV0: Single value imputation of time of first outcome measurement (0 days)";

ods rtf close;

proc format library = loc;

value $imp_	 	'c1'='VTC (SR)'
				'c2'='VTC (SV90)'
				'c3'='VTC (SV30)'
				'p1'='CDDC (SR)'
				'p2'='CDDC (SV14)'
				'p3'='CDDC (SV0)';
run;

****************************************************************************;
****************************************************************************;
**********  Summarize freq of obs by missing length of stay ****************;
****************************************************************************;
****************************************************************************;

proc freq data = loc.&dat.&v3 noprint;
	where imp = 'c1' and miss_dis = 0;
	tables urines_observed/ out = temp1;
run;

proc freq data = loc.&dat.&v3 noprint;
	where imp = 'c1' and miss_dis = 1;
	tables urines_observed/ out = temp2;
run;

proc freq data = loc.&dat.&v3 noprint;
	where imp = 'c2' and miss_dis = 1;
	tables urines_observed/ out = temp3;
run;

proc freq data = loc.&dat.&v3 noprint;
	where  imp = 'c3' and miss_dis = 1;
	tables urines_observed/ out = temp4;
run;

data tempall;
	merge temp1(rename=(count=v1))
		temp2(rename=(count=v2))
		temp3(rename=(count=v3))
		temp4(rename=(count=v4));
	by urines_observed;

	label v1 = "Known discharge date";
	label v2 = "Unknown discharge date (SR)";
	label v3 = "Unknown discharge date (SV90)";
	label v4 = "Unknown discharge date (SV30)";
	label urines_observed = "Number of observations";
	drop percent;
run;


%let file = "&out_dir.\CDDC Urine Supp freq of obs by miss_dis &date..rtf";
ods rtf file = &file Style = custom startpage = NO BODYTITLE;

Title 'Table S6. Frequencies of participants with various numbers of outcome measurements post-discharge, by knowledge of discharge date (VTC group).';
proc print data = tempall noobs label; 
var urines_observed /style(header data)={just=l};
var v1-v4/style(header data)={just=c};
run;
Title;
ods text = "VTC: Voluntary Treatment Center";
ods text = "SR: Stochastic regression imputation";
ods text = "SV90: Single value imputation of length of inpatient stay (90 days)";
ods text = "SV30: Single value imputation of length of inpatient stay (30 days)";


ods rtf close;

***************************************************************;
/*
* Number of opiate-positive urines for those with no post-discharge measurements;

proc print data = loc.&dat.&v3;
	where imp = 'c1' and urines_observed < 1;
	var client_id nT: nY0_opi nY1_opi nY2_opi nY3_opi;
run;

*/

data loc.&out;
	set loc.&dat.&v3;
run;

****************************************************************************;
****************************************************************************;
*********************   Remove old datasets               ******************;
****************************************************************************;
****************************************************************************;


proc datasets lib=loc nolist;       
	delete &dat.15 - &dat.19 &dat.16:;   
run;
quit;


****************************************************************************;
****************************************************************************;
**************                END                ***************************;
****************************************************************************;
****************************************************************************;

%MEND missing_stage1;

